Bulk RNASeq data from Mayo Clinic.
# gaussian to run the linear regression
pheno_list = c("Braak" = "gaussian",
"thal" = "gaussian"
# "diagnosis" = "binomial" # AD x CT
)work_dir = "/pastel/Github_scripts/SpeakEasy_dlpfc/bulk_dlpfc/replication/"
resources = "/pastel/resources/MayoClinic/RNAseq/"
net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_net_MF/"
library(tidyverse)
library(janitor)
library(WGCNA)We already have the modules from our ROSMAP dataset, so I’ll use this as gene list and calculate the eigengenes and module average expression for the same genes for Mayo. Then, we’ll run regressions with their AD-related covariates to check for associations.
metadata = read.csv(paste0(resources, "RNAseq_Harmonization_Mayo_combined_metadata.csv"))
metadata$ageDeath = as.numeric(gsub("\\+", "", metadata$ageDeath))
message(paste0("Unique individualID: ", length(unique(metadata$individualID)))) ## Unique individualID: 355
## Unique specimenID: 597
##
## cerebellum temporal cortex
## 278 319
To avoid repeated measures.
# metadata
metadata_temp = metadata[metadata$exclude == "FALSE", ] # Remove the samples that didn't pass QC
# metadata_temp = metadata_temp[metadata_temp$diagnosis %in% c("Alzheimer Disease", "control"), ] # We filter to get only AD and CT
metadata_sel = metadata_temp[metadata_temp$tissue == "temporal cortex", ] # select the temporal cortex
# expression
expr_mtx = read.table(paste0(resources, "Mayo_Normalized_counts_CQN.tsv"), check.names = F, header = T)
rownames(expr_mtx) = expr_mtx$feature
expr_mtx_sel = expr_mtx[, colnames(expr_mtx) %in% metadata_sel$specimenID] # select the same samples from the metadata Bulk DLPFC.
modules_file = read.table(paste0(net_dir, "geneBycluster.txt"), header = T)
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
net_output = modules_file %>% filter(! cluster_lv3 %in% as.integer(too_small)) # the clusters with at least 30 nodes
net_output = net_output[,c("ensembl", "cluster_lv3", "gene_type", "symbol")]
net_output$ensembl2 = gsub("(.*)\\.(.*)", "\\1",net_output$ensembl)
message(paste0("Number of modules: ", length(unique(net_output$cluster_lv3))))## Number of modules: 34
I got our gene list and calculated the ME for Mayo Clinic expression.
gene_mod_map = data.frame(ensembl = rownames(expr_mtx_sel))
gene_mod_map = gene_mod_map %>% inner_join(net_output[,-1], by = c("ensembl" = "ensembl2"))
expr_mtx_sel_ord = expr_mtx_sel[gene_mod_map$ensembl,] # order the matrix
identical(rownames(expr_mtx_sel_ord), gene_mod_map$ensembl) # must be TRUE ## [1] TRUE
colors_mod = gene_mod_map$cluster_lv3
expr_mtx_sel_ord_t = as.data.frame(t(expr_mtx_sel_ord))
external_ME = moduleEigengenes(expr_mtx_sel_ord_t, colors = colors_mod)
mod_average = external_ME$averageExpr # data.frame
# save results
# save(external_ME, metadata_sel, file = paste0(work_dir, "Mayo_moduleEigengenes_lv3.Rdata"))
# write.table(external_ME$eigengenes, file = paste0(work_dir, "Mayo_moduleEigengenes_lv3.txt"), sep = "\t", quote = F, row.names = T)
# write.table(external_ME$averageExpr, file = paste0(work_dir, "Mayo_moduleaverageExpr_lv3.txt"), sep = "\t", quote = F, row.names = T)
createDT(mod_average)Input: Average expression by module.
Numbers and colors : -log10(nominal pvalue)
cutpoints:
< 0.001 = ***
0.01 = **
0.05 = *
0.1 = .
1 = ” ”
data4linear_reg <- mod_average # match codes name
phenotype_dt = metadata_sel[match(rownames(data4linear_reg), metadata_sel$specimenID), ]
all(rownames(data4linear_reg) == phenotype_dt$specimenID) # Must be TRUE. Match IDs## [1] TRUE
# hist(phenotype_dt$Braak)
message(paste0("Unique individualID: ", length(unique(phenotype_dt$individualID)))) ## Unique individualID: 258
## Unique specimenID: 258
res_test = run_module_trait_association(data4linear_reg, phenotype_dt, pheno_list, covariates = c("ageDeath","sex")) # covariates to adjust
matrix_rsquared = res_test$matrix_rsquared
matrix_pvalue = res_test$matrix_pvalue
### Get modules >= 30 nodes to show:
to_show = paste0("AE",as.character( modules_size$module[modules_size$n_nodes >= 30]))
# pdf("/pastel/Github_scripts/SpeakEasy_dlpfc/figures4paper/reg_Mayo_adjcov.pdf", width = 2.5, height = 11)
plot_module_trait_association_heatmap(res_test, to_show)Top result by covariate.
Threshold: At least one module with adjusted pvalue < 0.05.
Method: Bonferroni by column.
plot_module_trait_association_heatmap(res_test, to_show, show_only_significant = T, signif_cutoff = 0.05)## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
plot_module_trait_association_heatmap(res_test, to_show, show_only_significant = T, signif_cutoff = 0.000001)## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## Adjusting P-values by Bonferroni correction by each phenotype separately. Double check if rows of res_test$matrix_pvalue are phenotypes and columns are your features (e.g. modules)
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RColorBrewer_1.1-3 circlize_0.4.16 ComplexHeatmap_2.15.4
## [4] performance_0.12.2 lmerTest_3.1-3 lme4_1.1-35.1
## [7] Matrix_1.6-5 WGCNA_1.72-5 fastcluster_1.2.6
## [10] dynamicTreeCut_1.63-1 janitor_2.2.0 lubridate_1.9.3
## [13] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [16] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [19] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] minqa_1.2.7 colorspace_2.1-1 rjson_0.2.21
## [4] snakecase_0.11.1 htmlTable_2.4.3 XVector_0.34.0
## [7] GlobalOptions_0.1.2 base64enc_0.1-3 clue_0.3-65
## [10] rstudioapi_0.16.0 DT_0.33 bit64_4.0.5
## [13] AnnotationDbi_1.56.2 fansi_1.0.6 codetools_0.2-18
## [16] splines_4.1.2 doParallel_1.0.17 cachem_1.1.0
## [19] impute_1.68.0 knitr_1.48 Formula_1.2-5
## [22] jsonlite_1.8.8 nloptr_2.1.1 Cairo_1.6-2
## [25] cluster_2.1.2 GO.db_3.14.0 png_0.1-8
## [28] compiler_4.1.2 httr_1.4.7 backports_1.5.0
## [31] fastmap_1.2.0 cli_3.6.3 htmltools_0.5.8.1
## [34] tools_4.1.2 gtable_0.3.5 glue_1.7.0
## [37] GenomeInfoDbData_1.2.7 reshape2_1.4.4 Rcpp_1.0.13
## [40] Biobase_2.54.0 jquerylib_0.1.4 vctrs_0.6.5
## [43] Biostrings_2.62.0 preprocessCore_1.61.0 nlme_3.1-153
## [46] iterators_1.0.14 crosstalk_1.2.1 insight_0.20.2
## [49] xfun_0.46 timechange_0.3.0 lifecycle_1.0.4
## [52] zlibbioc_1.40.0 MASS_7.3-60.0.1 scales_1.3.0
## [55] hms_1.1.3 parallel_4.1.2 yaml_2.3.10
## [58] memoise_2.0.1 gridExtra_2.3 sass_0.4.9
## [61] rpart_4.1-15 stringi_1.8.4 RSQLite_2.3.7
## [64] highr_0.11 S4Vectors_0.32.4 foreach_1.5.2
## [67] checkmate_2.3.2 BiocGenerics_0.40.0 boot_1.3-28
## [70] shape_1.4.6.1 GenomeInfoDb_1.30.1 rlang_1.1.4
## [73] pkgconfig_2.0.3 matrixStats_1.3.0 bitops_1.0-8
## [76] evaluate_0.24.0 lattice_0.20-45 htmlwidgets_1.6.4
## [79] bit_4.0.5 tidyselect_1.2.1 plyr_1.8.9
## [82] magrittr_2.0.3 R6_2.5.1 magick_2.8.4
## [85] IRanges_2.28.0 generics_0.1.3 Hmisc_5.1-3
## [88] DBI_1.2.3 pillar_1.9.0 foreign_0.8-81
## [91] withr_3.0.0 survival_3.7-0 KEGGREST_1.34.0
## [94] RCurl_1.98-1.16 nnet_7.3-16 crayon_1.5.3
## [97] utf8_1.2.4 tzdb_0.4.0 rmarkdown_2.27
## [100] GetoptLong_1.0.5 data.table_1.15.4 blob_1.2.4
## [103] digest_0.6.36 numDeriv_2016.8-1.1 stats4_4.1.2
## [106] munsell_0.5.1 bslib_0.8.0